********************************************************************************
* This file builds the data analysis files for "Bidding for Firms" Slattery (2024)
* The main output files are data_analysis.dta and data_analysis_long.dta.
* There are many input files, which are descripted in the README
********************************************************************************
clear all
set more off

********************************************************************************
* ENVIRONMENT
********************************************************************************

global user 	 "/Users/cailin.slattery/Dropbox"
global jm_root	 "$user/Incentives/postJM/Replication"
global datadir	 "$jm_root/data"
global dodir	 "$jm_root/code"
global draftdir    "$jm_root/draft"
global appxdir    "$jm_root/draft/appendix"
global cbpdir    "$jm_root/data/raw/cbp"
global temp 	 "`c(tmpdir)'"

global gpr = "plotregion(color(white) margin(small)) graphregion(color(white))"
set scheme s1mono

ssc install carryforward 
ssc install winsor2 
do $dodir/adjust_inflation

set seed 112724

********************************************************************************
* HELPER FILES
********************************************************************************
**** crosswalk to map counties to commuting zones (cz) ****
import excel using $datadir/crosswalk/cz00_eqv_v1.xls, firstrow case(l)clear
keep fips countyname commutingzoneid2000
isid fips

g statefip = substr(fips,1,2)
g countyfip = substr(fips, 3,3)
destring *fip, replace
rename countyfip fipscty

replace countyname = subinstr(countyname," County","",.)
replace countyname = subinstr(countyname, "St. ", "St ",.)
replace countyname = subinstr(countyname," Parish","",.)
replace countyname = lower(countyname)
replace countyname = "new york city" if countyname == "new york"

save $datadir/cty2cz, replace

*** industries and years in the sample **** 
import excel using $datadir/raw/mega_threat_clean.xlsx, firstrow clear
keep naics4 year
replace year=year-1 //going to use industry variables in t-1
duplicates drop
sa $datadir/sample_year_naics, replace

********************************************************************************
* DATA BUILD
********************************************************************************

*do $dodir/process_ipums.do // process raw data from ipums, make demographic and occupation files (make_demo.do and make_occ.do)
do $dodir/build_cbp.do // process county business patterns data 
do $dodir/make_industry.do // create wage and employment variables at the industry-county level 
do $dodir/make_county.do // compiles location characteristics from a variety of sources
do $dodir/make_subsidy.do // cleans the raw subsidy data 
do $dodir/make_analysis.do // takes all of the outputs from the files above to create analysis data set
